{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Box Plots"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following illustrates some options for the boxplot in statsmodels. These include `violin_plot` and `bean_plot`."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import statsmodels.api as sm"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Bean Plots"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following example is taken from the docstring of `beanplot`.\n",
    "\n",
    "We use the American National Election Survey 1996 dataset, which has Party\n",
    "Identification of respondents as independent variable and (among other\n",
    "data) age as dependent variable."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "data = sm.datasets.anes96.load_pandas()\n",
    "party_ID = np.arange(7)\n",
    "labels = [\n",
    "    \"Strong Democrat\",\n",
    "    \"Weak Democrat\",\n",
    "    \"Independent-Democrat\",\n",
    "    \"Independent-Independent\",\n",
    "    \"Independent-Republican\",\n",
    "    \"Weak Republican\",\n",
    "    \"Strong Republican\",\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Group age by party ID, and create a violin plot with it:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.rcParams[\"figure.subplot.bottom\"] = 0.23  # keep labels visible\n",
    "plt.rcParams[\"figure.figsize\"] = (10.0, 8.0)  # make plot larger in notebook\n",
    "age = [data.exog[\"age\"][data.endog == id] for id in party_ID]\n",
    "fig = plt.figure()\n",
    "ax = fig.add_subplot(111)\n",
    "plot_opts = {\n",
    "    \"cutoff_val\": 5,\n",
    "    \"cutoff_type\": \"abs\",\n",
    "    \"label_fontsize\": \"small\",\n",
    "    \"label_rotation\": 30,\n",
    "}\n",
    "sm.graphics.beanplot(age, ax=ax, labels=labels, plot_opts=plot_opts)\n",
    "ax.set_xlabel(\"Party identification of respondent.\")\n",
    "ax.set_ylabel(\"Age\")\n",
    "# plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def beanplot(data, plot_opts={}, jitter=False):\n",
    "    \"\"\"helper function to try out different plot options\"\"\"\n",
    "    fig = plt.figure()\n",
    "    ax = fig.add_subplot(111)\n",
    "    plot_opts_ = {\n",
    "        \"cutoff_val\": 5,\n",
    "        \"cutoff_type\": \"abs\",\n",
    "        \"label_fontsize\": \"small\",\n",
    "        \"label_rotation\": 30,\n",
    "    }\n",
    "    plot_opts_.update(plot_opts)\n",
    "    sm.graphics.beanplot(\n",
    "        data, ax=ax, labels=labels, jitter=jitter, plot_opts=plot_opts_\n",
    "    )\n",
    "    ax.set_xlabel(\"Party identification of respondent.\")\n",
    "    ax.set_ylabel(\"Age\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = beanplot(age, jitter=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = beanplot(age, plot_opts={\"violin_width\": 0.5, \"violin_fc\": \"#66c2a5\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = beanplot(age, plot_opts={\"violin_fc\": \"#66c2a5\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = beanplot(\n",
    "    age, plot_opts={\"bean_size\": 0.2, \"violin_width\": 0.75, \"violin_fc\": \"#66c2a5\"}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = beanplot(age, jitter=True, plot_opts={\"violin_fc\": \"#66c2a5\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = beanplot(\n",
    "    age, jitter=True, plot_opts={\"violin_width\": 0.5, \"violin_fc\": \"#66c2a5\"}\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Advanced Box Plots"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Based of example script `example_enhanced_boxplots.py` (by Ralf Gommers)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import statsmodels.api as sm\n",
    "\n",
    "\n",
    "# Necessary to make horizontal axis labels fit\n",
    "plt.rcParams[\"figure.subplot.bottom\"] = 0.23\n",
    "\n",
    "data = sm.datasets.anes96.load_pandas()\n",
    "party_ID = np.arange(7)\n",
    "labels = [\n",
    "    \"Strong Democrat\",\n",
    "    \"Weak Democrat\",\n",
    "    \"Independent-Democrat\",\n",
    "    \"Independent-Independent\",\n",
    "    \"Independent-Republican\",\n",
    "    \"Weak Republican\",\n",
    "    \"Strong Republican\",\n",
    "]\n",
    "\n",
    "# Group age by party ID.\n",
    "age = [data.exog[\"age\"][data.endog == id] for id in party_ID]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a violin plot.\n",
    "fig = plt.figure()\n",
    "ax = fig.add_subplot(111)\n",
    "\n",
    "sm.graphics.violinplot(\n",
    "    age,\n",
    "    ax=ax,\n",
    "    labels=labels,\n",
    "    plot_opts={\n",
    "        \"cutoff_val\": 5,\n",
    "        \"cutoff_type\": \"abs\",\n",
    "        \"label_fontsize\": \"small\",\n",
    "        \"label_rotation\": 30,\n",
    "    },\n",
    ")\n",
    "\n",
    "ax.set_xlabel(\"Party identification of respondent.\")\n",
    "ax.set_ylabel(\"Age\")\n",
    "ax.set_title(\"US national election '96 - Age & Party Identification\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a bean plot.\n",
    "fig2 = plt.figure()\n",
    "ax = fig2.add_subplot(111)\n",
    "\n",
    "sm.graphics.beanplot(\n",
    "    age,\n",
    "    ax=ax,\n",
    "    labels=labels,\n",
    "    plot_opts={\n",
    "        \"cutoff_val\": 5,\n",
    "        \"cutoff_type\": \"abs\",\n",
    "        \"label_fontsize\": \"small\",\n",
    "        \"label_rotation\": 30,\n",
    "    },\n",
    ")\n",
    "\n",
    "ax.set_xlabel(\"Party identification of respondent.\")\n",
    "ax.set_ylabel(\"Age\")\n",
    "ax.set_title(\"US national election '96 - Age & Party Identification\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create a jitter plot.\n",
    "fig3 = plt.figure()\n",
    "ax = fig3.add_subplot(111)\n",
    "\n",
    "plot_opts = {\n",
    "    \"cutoff_val\": 5,\n",
    "    \"cutoff_type\": \"abs\",\n",
    "    \"label_fontsize\": \"small\",\n",
    "    \"label_rotation\": 30,\n",
    "    \"violin_fc\": (0.8, 0.8, 0.8),\n",
    "    \"jitter_marker\": \".\",\n",
    "    \"jitter_marker_size\": 3,\n",
    "    \"bean_color\": \"#FF6F00\",\n",
    "    \"bean_mean_color\": \"#009D91\",\n",
    "}\n",
    "sm.graphics.beanplot(age, ax=ax, labels=labels, jitter=True, plot_opts=plot_opts)\n",
    "\n",
    "ax.set_xlabel(\"Party identification of respondent.\")\n",
    "ax.set_ylabel(\"Age\")\n",
    "ax.set_title(\"US national election '96 - Age & Party Identification\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create an asymmetrical jitter plot.\n",
    "ix = data.exog[\"income\"] < 16  # incomes < $30k\n",
    "age = data.exog[\"age\"][ix]\n",
    "endog = data.endog[ix]\n",
    "age_lower_income = [age[endog == id] for id in party_ID]\n",
    "\n",
    "ix = data.exog[\"income\"] >= 20  # incomes > $50k\n",
    "age = data.exog[\"age\"][ix]\n",
    "endog = data.endog[ix]\n",
    "age_higher_income = [age[endog == id] for id in party_ID]\n",
    "\n",
    "fig = plt.figure()\n",
    "ax = fig.add_subplot(111)\n",
    "\n",
    "plot_opts[\"violin_fc\"] = (0.5, 0.5, 0.5)\n",
    "plot_opts[\"bean_show_mean\"] = False\n",
    "plot_opts[\"bean_show_median\"] = False\n",
    "plot_opts[\"bean_legend_text\"] = \"Income < \\\\$30k\"\n",
    "plot_opts[\"cutoff_val\"] = 10\n",
    "sm.graphics.beanplot(\n",
    "    age_lower_income,\n",
    "    ax=ax,\n",
    "    labels=labels,\n",
    "    side=\"left\",\n",
    "    jitter=True,\n",
    "    plot_opts=plot_opts,\n",
    ")\n",
    "plot_opts[\"violin_fc\"] = (0.7, 0.7, 0.7)\n",
    "plot_opts[\"bean_color\"] = \"#009D91\"\n",
    "plot_opts[\"bean_legend_text\"] = \"Income > \\\\$50k\"\n",
    "sm.graphics.beanplot(\n",
    "    age_higher_income,\n",
    "    ax=ax,\n",
    "    labels=labels,\n",
    "    side=\"right\",\n",
    "    jitter=True,\n",
    "    plot_opts=plot_opts,\n",
    ")\n",
    "\n",
    "ax.set_xlabel(\"Party identification of respondent.\")\n",
    "ax.set_ylabel(\"Age\")\n",
    "ax.set_title(\"US national election '96 - Age & Party Identification\")\n",
    "\n",
    "\n",
    "# Show all plots.\n",
    "# plt.show()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
